Pena
small_mtcars <-
mtcars %>%
arrange(gear) %>%
slice(1:10)
small_mtcars
library(ggplot2)
ggplot(mtcars, aes(x = wt, y = mpg)) +
geom_point() +
geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'

#install.packages("pacman")
library(pacman)
## Warning: package 'pacman' was built under R version 4.2.1
p_load(ggplot2, ggthemes, dplyr, readr)
chilean_exports <- "year,product,export,percentage
2006,copper,4335009500,81
2006,others,1016726518,19
2007,copper,9005361914,86
2007,others,1523085299,14
2008,copper,6907056354,80
2008,others,1762684216,20
2009,copper,10529811075,81
2009,others,2464094241,19
2010,copper,14828284450,85
2010,others,2543015596,15
2011,copper,15291679086,82
2011,others,3447972354,18
2012,copper,14630686732,80
2012,others,3583968218,20
2013,copper,15244038840,79
2013,others,4051281128,21
2014,copper,14703374241,78
2014,others,4251484600,22
2015,copper,13155922363,78
2015,others,3667286912,22"
exports_data <- read_csv(chilean_exports)
## Rows: 20 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): product
## dbl (3): year, export, percentage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
exports_data
p1 <- ggplot(aes(y = export, x = year, colour = product), data = exports_data) +
geom_line()
p1

library(reticulate)
## Warning: package 'reticulate' was built under R version 4.2.1
reticulate::conda_install(packages = "numpy")
## + "C:/Users/jipm1/AppData/Local/r-miniconda/condabin/conda.bat" "install" "--yes" "--name" "r-reticulate" "-c" "conda-forge" "numpy"
import numpy as np
library(readxl)
CEP<-read_excel("D:/CEP_sep-oct_2017.xlsx",sheet=2)
head(CEP)
library(dplyr)
CEP1=select(CEP,pond=POND,sexo=SEXO,
region=REGION,edad=DS_P2_EXACTA,
satisfaccion_vida=SV_1,satisfaccion_chilenos=SV_2,eval_econ=MB_P2 )
CEP1
class(CEP1$sexo)
## [1] "numeric"
table(CEP1$sexo)
##
## 1 2
## 553 871
library(dplyr)
CEP2<-mutate(CEP1, sexo_chr = dplyr::recode(CEP1$sexo, '1' = "hombre", '2' = "mujer"))
table(CEP2$sexo_chr)
##
## hombre mujer
## 553 871
CEP3 <- mutate(CEP2, sexo_factor = factor(CEP2$sexo,
labels = c("Hombre", "Mujer")))
class(CEP3$sexo_factor)
## [1] "factor"
table(CEP3$region)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 24 57 24 52 150 82 94 192 98 69 5 17 501 39 20
library(knitr)
kable(table(CEP3$region))
| 1 |
24 |
| 2 |
57 |
| 3 |
24 |
| 4 |
52 |
| 5 |
150 |
| 6 |
82 |
| 7 |
94 |
| 8 |
192 |
| 9 |
98 |
| 10 |
69 |
| 11 |
5 |
| 12 |
17 |
| 13 |
501 |
| 14 |
39 |
| 15 |
20 |
class(CEP3$region)
## [1] "numeric"
library(car)
## Warning: package 'car' was built under R version 4.2.1
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.2.1
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
CEP <- mutate(CEP3, region_factor = car::recode(CEP3$region, "1:12 = 1; 13 = 2; 14:15 = 1"))
class(CEP$region_factor)
## [1] "numeric"
library(VIM)
## Warning: package 'VIM' was built under R version 4.2.1
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(ggplot2)
head(msleep)
newdata<-kNN(msleep,k=5)
head(newdata)
% Please add the following required packages to your document preamble:
%
% Note: It may be necessary to compile the document several times to get
a multi-page table to line up properly
library(dlookr) # for exploratory data analysis and imputation
## Warning: package 'dlookr' was built under R version 4.2.1
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:base':
##
## transform
plot_na_pareto(airquality, only_na = TRUE)

plot_na_intersect(airquality)

library(visdat) # for visualizing NAs
## Warning: package 'visdat' was built under R version 4.2.1
library(plotly) # for interactive visualization
## Warning: package 'plotly' was built under R version 4.2.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
vis_miss(airquality) %>% ggplotly()
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
blip <- imputate_na(airquality, Ozone, Solar.R, method = "mean")
plot(blip)

blip
## [1] 41.00000 36.00000 12.00000 18.00000 42.12931 28.00000 23.00000
## [8] 19.00000 8.00000 42.12931 7.00000 16.00000 11.00000 14.00000
## [15] 18.00000 14.00000 34.00000 6.00000 30.00000 11.00000 1.00000
## [22] 11.00000 4.00000 32.00000 42.12931 42.12931 42.12931 23.00000
## [29] 45.00000 115.00000 37.00000 42.12931 42.12931 42.12931 42.12931
## [36] 42.12931 42.12931 29.00000 42.12931 71.00000 39.00000 42.12931
## [43] 42.12931 23.00000 42.12931 42.12931 21.00000 37.00000 20.00000
## [50] 12.00000 13.00000 42.12931 42.12931 42.12931 42.12931 42.12931
## [57] 42.12931 42.12931 42.12931 42.12931 42.12931 135.00000 49.00000
## [64] 32.00000 42.12931 64.00000 40.00000 77.00000 97.00000 97.00000
## [71] 85.00000 42.12931 10.00000 27.00000 42.12931 7.00000 48.00000
## [78] 35.00000 61.00000 79.00000 63.00000 16.00000 42.12931 42.12931
## [85] 80.00000 108.00000 20.00000 52.00000 82.00000 50.00000 64.00000
## [92] 59.00000 39.00000 9.00000 16.00000 78.00000 35.00000 66.00000
## [99] 122.00000 89.00000 110.00000 42.12931 42.12931 44.00000 28.00000
## [106] 65.00000 42.12931 22.00000 59.00000 23.00000 31.00000 44.00000
## [113] 21.00000 9.00000 42.12931 45.00000 168.00000 73.00000 42.12931
## [120] 76.00000 118.00000 84.00000 85.00000 96.00000 78.00000 73.00000
## [127] 91.00000 47.00000 32.00000 20.00000 23.00000 21.00000 24.00000
## [134] 44.00000 21.00000 28.00000 9.00000 13.00000 46.00000 18.00000
## [141] 13.00000 24.00000 16.00000 13.00000 23.00000 36.00000 7.00000
## [148] 14.00000 30.00000 42.12931 14.00000 18.00000 20.00000
## attr(,"var_type")
## [1] "numerical"
## attr(,"method")
## [1] "mean"
## attr(,"na_pos")
## [1] 5 10 25 26 27 32 33 34 35 36 37 39 42 43 45 46 52 53 54
## [20] 55 56 57 58 59 60 61 65 72 75 83 84 102 103 107 115 119 150
## attr(,"type")
## [1] "missing values"
## attr(,"message")
## [1] "complete imputation"
## attr(,"success")
## [1] TRUE
## attr(,"class")
## [1] "imputation" "numeric"